In [1]:
import sdm as sdmlib
import matplotlib.pyplot as plt
import time
from collections import defaultdict, OrderedDict
from IPython.display import clear_output
%matplotlib inline
In [43]:
class Timer(object):
def __init__(self):
self.stats = OrderedDict()
self.current_stats = None
def start(self, key, name):
self.current_stats = {
'name': name,
't0_loops': [],
't0': time.time(),
}
self.stats[key] = self.current_stats
self.t0 = time.time()
def measure_loop(self):
self.current_stats['t0_loops'].append(time.time())
def end(self):
d = self.current_stats
d['dt'] = time.time() - d['t0']
if d['t0_loops']:
d['number-of-loops'] = len(d['t0_loops'])
d['loops-per-second'] = d['number-of-loops'] / d['dt']
d['loops'] = []
t0 = d['t0']
for t1 in d['t0_loops']:
d['loops'].append(t1-t0)
t0 = t1
d.pop('t0')
d.pop('t0_loops')
self.current_stats = None
In [3]:
def run_part1(bits, sample, radius, nlinear=1000, nthread=5000, nopencl=5000):
address_space = sdmlib.AddressSpace.init_random(bits, sample)
address_space.opencl_init();
timer = Timer()
print('Running linear scan...')
timer.start('scan_linear', 'Linear scan')
for i in range(nlinear):
bs = sdmlib.Bitstring.init_random(bits)
address_space.scan_linear2(bs, radius)
timer.measure_loop()
timer.end()
timer.start('scan_thread', 'Thread scan')
print('Running thread scan...')
for i in range(nthread):
bs = sdmlib.Bitstring.init_random(bits)
address_space.scan_thread2(bs, radius)
timer.measure_loop()
timer.end()
timer.start('scan_opencl', 'OpenCL scan')
print('Running OpenCL scan...')
for i in range(nopencl):
bs = sdmlib.Bitstring.init_random(bits)
address_space.scan_opencl2(bs, radius)
timer.measure_loop()
timer.end()
return timer
In [1]:
def results_part1(bits, timer):
from IPython.display import display, Markdown
v = [['', 'Loops', 'Total time', 'Scans per second', 'Time per scan (ms)']]
v.append(['---', '---:', '---:', '---:', '---:'])
for key in ['scan_linear', 'scan_thread', 'scan_opencl']:
stats = timer.stats[key]
v.append([
stats['name'],
stats['number-of-loops'],
stats['dt'],
stats['loops-per-second'],
1000.0 / stats['loops-per-second'],
])
txt = '\n'.join(['|'.join([str(x) for x in row]) for row in v])
display(Markdown(txt))
latex = '\\ \hfill\n'.join([' & '.join([str(x) for x in row]) for row in v])
print latex
plt.figure(figsize=(8, 6), dpi=300)
plt.hist([1000*x for x in timer.stats['scan_linear']['loops']], bins='fd', density=True, alpha=0.9, label='Linear scan')
plt.hist([1000*x for x in timer.stats['scan_thread']['loops']], bins='fd', density=True, alpha=0.9, label='Thread scan')
plt.hist([1000*x for x in timer.stats['scan_opencl']['loops']], bins='fd', density=True, alpha=0.9, label='OpenCL scan')
plt.legend()
plt.title('Scan performance ($n={}$ bits)'.format(bits))
plt.xlabel('Scan duration (ms)')
plt.ylabel('Probability')
In [5]:
timer1000 = run_part1(1000, 1000000, 451)
In [2]:
results_part1(1000, timer1000)
In [7]:
timer256 = run_part1(256, 1000000, 103)
In [3]:
results_part1(256, timer256)
plt.xlim(2.4, 7.5);
In [9]:
timer10k = run_part1(10000, 1000000, 4845, nlinear=100, nthread=500, nopencl=1000)
In [14]:
results_part1(10000, timer10k)
plt.xlim(7, 150);
In [18]:
def read_write(key, name, timer, bits, sample, radius, scanner_type, n):
address_space = sdmlib.AddressSpace.init_random(bits, sample)
counter = sdmlib.Counter.init_zero(bits, sample)
sdm = sdmlib.SDM(address_space, counter, radius, scanner_type)
timer.start('write_{}'.format(key), '{} write'.format(name))
for _ in range(n):
bs = sdmlib.Bitstring.init_random(bits)
sdm.write(bs, bs)
timer.measure_loop()
timer.end()
timer.start('read_{}'.format(key), '{} single read'.format(name))
for _ in range(n):
bs = sdmlib.Bitstring.init_random(bits)
sdm.read(bs)
timer.measure_loop()
timer.end()
return timer
In [23]:
def run_part2(bits, sample, radius, nthread=1000, nopencl=1000):
timer = Timer()
print 'Running thread...'
read_write('thread', 'Thread', timer, bits, sample, radius, sdmlib.SDM_SCANNER_THREAD, n=nthread)
print 'Running OpenCL...'
read_write('opencl', 'OpenCL', timer, bits, sample, radius, sdmlib.SDM_SCANNER_OPENCL, n=nopencl)
return timer
In [65]:
def results_part2(bits, timer, hist_xlim=None):
from IPython.display import display, Markdown
v = [['', 'Loops', 'Total time', 'Operation per second', 'Time per operation (ms)']]
v.append(['---', '---:', '---:', '---:', '---:'])
for key in timer.stats.keys():
stats = timer.stats[key]
v.append([
stats['name'],
stats['number-of-loops'],
stats['dt'],
stats['loops-per-second'],
1000.0 / stats['loops-per-second'],
])
txt = '\n'.join(['|'.join([str(x) for x in row]) for row in v])
display(Markdown(txt))
plt.figure(figsize=(8, 6), dpi=300)
for key in ['write_thread', 'write_opencl']:
stats = timer.stats[key]
plt.hist([1000*x for x in stats['loops']], bins='fd', density=True, alpha=0.9, label=stats['name'])
plt.legend()
plt.title('Write performance ($n={}$ bits)'.format(bits))
plt.xlabel('Scan duration (ms)')
plt.ylabel('Probability')
if hist_xlim:
plt.xlim(*hist_xlim)
plt.show()
plt.figure(figsize=(8, 6), dpi=300)
for key in ['read_thread', 'read_opencl']:
stats = timer.stats[key]
plt.hist([1000*x for x in stats['loops']], bins='fd', density=True, alpha=0.9, label=stats['name'])
plt.legend()
plt.title('Read performance ($n={}$ bits)'.format(bits))
plt.xlabel('Scan duration (ms)')
plt.ylabel('Probability')
if hist_xlim:
plt.xlim(*hist_xlim)
In [40]:
p2timer1000 = run_part2(1000, 1000000, 451, nthread=1000, nopencl=1000)
In [67]:
results_part2(1000, p2timer1000, hist_xlim=(0, 23))
In [53]:
p2timer256 = run_part2(256, 1000000, 103, nthread=2000, nopencl=2000)
In [61]:
results_part2(256, p2timer256, hist_xlim=(0, 8))
In [ ]:
#p2timer10k = run_part2(10000, 1000000, 4845, nthread=500, nopencl=1000)
In [ ]:
#results_part2(10000, p2timer10k, hist1_xlim=(0, 8), hist2_xlim=(0, 8))
In [57]:
def run_part3(bits, sample, radius, n=1000):
address_space = sdmlib.AddressSpace.init_random(bits, sample)
address_space.opencl_init()
#address_space.opencl_opts.verbose = 1
bs = sdmlib.Bitstring.init_random(bits)
expected = set(address_space.scan_opencl2(bs, radius))
timer = Timer()
for kernel in sdmlib.OPENCL_KERNEL_NAMES:
address_space.set_opencl_kernel(kernel)
print('Running kernel {}...'.format(kernel))
timer.start(kernel, kernel)
for _ in range(n):
address_space.scan_opencl2(bs, radius)
timer.measure_loop()
timer.end()
return timer
In [70]:
def results_part3(bits, timer, hist_xlim=None):
from IPython.display import display, Markdown
v = [['', 'Loops', 'Total time', 'Scans per second', 'Time per scan (ms)']]
v.append([':---', '---:', '---:', '---:', '---:'])
for key in timer.stats.keys():
stats = timer.stats[key]
v.append([
stats['name'],
stats['number-of-loops'],
stats['dt'],
stats['loops-per-second'],
1000.0 / stats['loops-per-second'],
])
txt = '\n'.join(['|'.join([str(x) for x in row]) for row in v])
display(Markdown(txt))
plt.figure(figsize=(8, 6), dpi=300)
for key in timer.stats.keys():
stats = timer.stats[key]
plt.hist([1000*x for x in stats['loops']], bins='fd', density=True, alpha=0.75, label=stats['name'])
plt.legend()
plt.title('Kernels performance ($n={}$ bits)'.format(bits))
plt.xlabel('Scan duration (ms)')
plt.ylabel('Probability')
if hist_xlim:
plt.xlim(*hist_xlim)
plt.show()
In [58]:
p3timer1000 = run_part3(1000, 1000000, 451, n=3000)
In [79]:
results_part3(1000, p3timer1000, hist_xlim=(2, 7))
In [72]:
p3timer256 = run_part3(256, 1000000, 103, n=3000)
In [75]:
results_part3(256, p3timer256, hist_xlim=(2, 7))
In [76]:
p3timer10k = run_part3(10000, 1000000, 4845, n=500)
In [82]:
results_part3(10000, p3timer10k, hist_xlim=(8, 70))
In [84]:
results_part3(10000, p3timer10k, hist_xlim=(10, 15))
In [ ]: